# clear environment
rm(list=ls())

# load packages
library(openxlsx)
library(ggplot2)

# load data set with chapter lengths and crime numbers
dat = openxlsx::read.xlsx("vsberichte_metadata.xlsx", sheet = 1)

# binary varibable indicating whether interior minister is from a center-right party
dat$cr_intmin <- ifelse(dat$intmin_party == "cdu" | dat$intmin_party== "csu" | dat$intmin_party=="fdp", 1, ifelse(dat$intmin_party=="pro", NA, 0))

# remove those without center-right or center-left interior minister
dat <- dat[!is.na(dat$cr_intmin),]

# create chapter length variable
dat$rwe_chapter_length <- dat$page_right_end-dat$page_right_start
dat$lwe_chapter_length <- dat$page_left_end-dat$page_left_start

# ratio of chapter length
dat$rwe_lwe_chapter_ratio_log <- log(dat$rwe_chapter_length + 0.5) - log(dat$lwe_chapter_length + 0.5)

# remove those with NA in chapter ratio
dat <- dat[!is.na(dat$rwe_lwe_chapter_ratio_log),]

# logged crime ratio
dat$rwe_lwe_crime_ratio_log <- log(dat$extreme_crime_right + 0.5) - log(dat$extreme_crime_left + 0.5)

# get crime ratio on federal level as a separate variable and merge with the main data 
crime_fed <- dat[dat$jurisdiction=="bund",c("year", "extreme_crime_right", "extreme_crime_left")]
names(crime_fed) <- c("year", "extreme_crime_right_fed", "extreme_crime_left_fed")
crime_fed$rwe_lwe_crime_ratio_log_fed <- log(crime_fed$extreme_crime_right_fed + 0.5) - log(crime_fed$extreme_crime_left_fed + 0.5)
dat <- merge(dat, crime_fed, by="year", all.x=T)

# impute missing values in the logged crime ratio with value on the federal level
dat$rwe_lwe_crime_ratio_log_fedimp <- ifelse(is.na(dat$rwe_lwe_crime_ratio_log),
                                             dat$rwe_lwe_crime_ratio_log_fed,
                                             dat$rwe_lwe_crime_ratio_log)

# create bias measure (difference between logged chapter length ratio and looged crime ratio)
dat$bias_ratio_fedimp <- dat$rwe_lwe_chapter_ratio_log - dat$rwe_lwe_crime_ratio_log_fedimp

# create decade indicator
dat$decade <- NA
dat$decade[dat$year<1960]<-1950
dat$decade[dat$year>=1960 & dat$year<1970]<-1960
dat$decade[dat$year>=1970 & dat$year<1980]<-1970
dat$decade[dat$year>=1980 & dat$year<1990]<-1980
dat$decade[dat$year>=1990 & dat$year<2000]<-1990
dat$decade[dat$year>=2000 & dat$year<2010]<-2000
dat$decade[dat$year>=2010 & dat$year<2020]<-2010
dat$decade[dat$year>=2020 & dat$year<2030]<-2020


## regressions models
# position
mod_1 <- lm(rwe_lwe_chapter_ratio_log ~ factor(cr_intmin), data=dat)
mod_2 <- lm(rwe_lwe_chapter_ratio_log ~ factor(cr_intmin) + factor(jurisdiction), data=dat)
mod_3 <- lm(rwe_lwe_chapter_ratio_log ~ factor(cr_intmin) + factor(jurisdiction) + factor(decade), data=dat)
mod_4 <- lm(rwe_lwe_chapter_ratio_log ~ factor(cr_intmin) + factor(jurisdiction) + factor(year), data=dat)
# bias
mod_5 <- lm(bias_ratio_fedimp ~ factor(cr_intmin), data=dat)
mod_6 <- lm(bias_ratio_fedimp ~ factor(cr_intmin) + factor(jurisdiction), data=dat)
mod_7 <- lm(bias_ratio_fedimp ~ factor(cr_intmin) + factor(jurisdiction) + factor(decade), data=dat)
mod_8 <- lm(bias_ratio_fedimp ~ factor(cr_intmin) + factor(jurisdiction) + factor(year), data=dat)


## coefficient plot
ests_df = rbind.data.frame(summary(mod_1)$coefficients[2,1:2],
                           summary(mod_4)$coefficients[2,1:2],
                           summary(mod_5)$coefficients[2,1:2],
                           summary(mod_8)$coefficients[2,1:2])
names(ests_df) = c("coef", "se")
ests_df$outcome = c("Position", "Position", "Bias", "Bias")
ests_df$outcome = factor(ests_df$outcome, levels=c("Position", "Bias"))
ests_df$mod = c("w/o Fixed Effects", "w/ Year and State Fixed Effects", "w/o Fixed Effects", "w/ Year and State Fixed Effects")
ests_df$mod = factor(ests_df$mod, levels = c("w/o Fixed Effects", "w/ Year and State Fixed Effects"))

ggplot(data=ests_df, aes(x=outcome, y=coef, group=factor(mod), shape=factor(mod))) + 
  geom_errorbar(aes(ymax=coef+1.96*se, ymin=coef-1.96*se), width=0, position = position_dodge(width = 0.5)) +
  geom_point(position=position_dodge(width=.5), size=2.5) + 
  theme_classic() +
  #  theme_bw() +
  theme(panel.grid = element_blank()) +
  geom_hline(yintercept = 0, linetype="dashed") +
  theme(legend.position = "bottom", text = element_text(size=18)) +
  xlab("Outcome Variable") + ylab("Coefficient of Center-Right\nInterior Minister") +
  scale_shape_manual(values=c(16, 17), name="")

ggsave("Fig4_2.pdf", width = 7, height = 5)
